The variable “burglary”
library(classdata)
A = cities$Burglary
mean(A, na.rm = TRUE)
## [1] 112.6739
sd(A, na.rm = TRUE)
## [1] 273.271
Please see the questions in the folder “Questions for cities data” in this repository.
library(ggplot2)
ggplot(fbiwide, aes(x = log(burglary), y = log(robbery))) + geom_point()
ggplot(fbiwide, aes(x = log(burglary), y = log(motor_vehicle_theft))) + geom_point()
# Add color by years
ggplot(fbiwide, aes(x = log(burglary), y = log(motor_vehicle_theft), colour = year)) + geom_point()
ggplot(data = fbiwide, aes(x = log(burglary), y = log(motor_vehicle_theft))) + geom_point(aes(color = state))
This is a bad idea because too many states and colors can not separate each of them.
ggplot(data = fbiwide, aes(x = log(burglary), y = log(motor_vehicle_theft))) + geom_point(aes(color = state, size = population))
ggplot(data = fbiwide, aes(x = population)) + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
The above histogram does not make sense as it includes multiple years
for the same state.
We make the histogram of popualtion for 2019.
ggplot(data = fbiwide[fbiwide$year == 2019, ], aes(x = population)) + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
fbiwide1 = fbiwide
fbiwide1 = fbiwide1[fbiwide1$year >= 1980, ]
fbiwide1 = fbiwide1[fbiwide1$year < 2020, ]
fbiwide1$decade = floor(fbiwide1$year / 10)
fbiwide1$decade = paste0(fbiwide1$decade, "0s")
ggplot(data = fbiwide1, aes(x = log(burglary), y = log(motor_vehicle_theft))) + geom_point(aes(color = decade))
ggplot(data = fbiwide1, aes(x = log(burglary), y = log(motor_vehicle_theft))) + geom_point(aes(color = decade)) + facet_wrap(.~ state_abbr)
state.subset = c("California", "Colorado", "Iowa", "Illinois", "District of Columbia", "New York")
fbiwide2 = fbiwide1[fbiwide1$state %in% state.subset, ]
ggplot(data = fbiwide2, aes(x = log(burglary), y = log(motor_vehicle_theft))) + geom_point(aes(color = decade)) + facet_wrap(.~ state)
ggplot(data = fbiwide2, aes(x = log(burglary / population * 66424), y = log(motor_vehicle_theft / population * 66424))) + geom_point(aes(color = decade)) + facet_wrap(.~ state)
“Facet” option in ggplot
ggplot(data = fbiwide, aes(x = year, y = motor_vehicle_theft)) + geom_point() + facet_wrap(~state)
ggplot(data = fbiwide, aes(x = year, y = log(motor_vehicle_theft))) + geom_point() + facet_wrap(~state)
ggplot(data = fbiwide, aes(x = year, y = motor_vehicle_theft / population)) + geom_point() + facet_wrap(~state, scale = "free_y")
# use fbiwide data
ggplot(data = fbiwide, aes(x = state, y = log(robbery / population))) + geom_boxplot() + coord_flip()
# use fbi data
ggplot(data = fbi[fbi$type == "robbery", ], aes(x = state, count / population)) + geom_boxplot() + coord_flip()
ggplot(data = fbi[fbi$type %in% c("homicide", "arson", "rape_legacy"), ], aes(x = state, y = count / population)) + geom_boxplot() + coord_flip() +
facet_wrap(~type)
## Warning: Removed 212 rows containing non-finite values (`stat_boxplot()`).
Boxplot of count vs type of crimes, facet by states
ggplot(data = fbi, aes(x = type, y = count / population)) + geom_boxplot() + facet_wrap(~state, scales = "free_y")
## Warning: Removed 1960 rows containing non-finite values (`stat_boxplot()`).
Histogram
specify number of bins:
ggplot(fbiwide, aes(x = motor_vehicle_theft)) +
geom_histogram(bins = 100) + ggtitle("number of bins = 100")
specify the binwidth:
ggplot(fbiwide, aes(x = motor_vehicle_theft)) +
geom_histogram(binwidth = 5000) + ggtitle("binwidth = 5000")
density plot by histogram:
ggplot(fbiwide, aes(x = motor_vehicle_theft)) +
geom_histogram(aes(y = ..density..))
## Warning: The dot-dot notation (`..density..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(density)` instead.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(data = fbi, aes(x = violent_crime)) +
geom_bar(aes(weight = count))